package com.wsh.crawler.service.impl.parser;

import com.wsh.crawler.entity.News;
import org.jsoup.nodes.Document;

import java.util.List;

public abstract class AbstractNewsParser {
    protected String sourceName;

    public AbstractNewsParser(String sourceName) {
        this.sourceName = sourceName;
    }

    /**
     * 解析列表页，获取文章链接
     * */
    public abstract List<String> parseArticleLinks(Document doc);

    /**
     * 解析文章详情页
     * */
    public abstract News parseArticle(Document doc, String url);

    /**
     * 提取封面图片链接
     * */
    public abstract String extractCoverImage(Document doc, List<String> otherImageUrls);

    /**
     * 提取图片链接
     * */
    protected abstract List<String> extractImageUrls(Document doc);

    /**
     * 提取视频链接
     * */
    protected abstract List<String> extractVideoUrls(Document doc);
}